library(bayesrules)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
E must have generated this plot. Since the prior is left skewed, we know alpha<beta, and since the likelihood is symmetrical, we know y/n=1/2. The only input sequence that fits this description is E.
plot_beta(1, 2)
plot_beta(0.5, 1)
plot_beta(3, 10)
plot_beta(2, 0.1)
set.seed(84735)
kimya_sim <- data.frame(pi = rbeta(10000, 1, 2)) %>%
mutate(y = rbinom(10000, size = 7, prob = pi))
Plotting simulation results
ggplot(kimya_sim, aes(x = pi, y = y)) +
geom_point(aes(color = (y == 3)), size = 0.1)
Filtering for only the data that matches our observed results
# Keep only the simulated pairs that match our data
kimya_posterior <- kimya_sim %>%
filter(y == 3)
# Plot the remaining pi values
ggplot(kimya_posterior, aes(x = pi)) +
geom_density()
Calculating mean pi value for y==3.
kimya_posterior %>%
summarize(mean(pi), sd(pi))
set.seed(84735)
fernando_sim <- data.frame(pi = rbeta(10000, 0.5, 1)) %>%
mutate(y = rbinom(10000, size = 7, prob = pi))
Plotting simulation results
ggplot(fernando_sim, aes(x = pi, y = y)) +
geom_point(aes(color = (y == 3)), size = 0.1)
Filtering for only the data that matches our observed results
# Keep only the simulated pairs that match our data
fernando_posterior <- fernando_sim %>%
filter(y == 3)
# Plot the remaining pi values
ggplot(fernando_posterior, aes(x = pi)) +
geom_density()
Calculating mean pi value for y==3.
fernando_posterior %>%
summarize(mean(pi), sd(pi))
set.seed(84735)
ciara_sim <- data.frame(pi = rbeta(10000, 3, 10)) %>%
mutate(y = rbinom(10000, size = 7, prob = pi))
Plotting simulation results
ggplot(ciara_sim, aes(x = pi, y = y)) +
geom_point(aes(color = (y == 3)), size = 0.1)
Filtering for only the data that matches our observed results
# Keep only the simulated pairs that match our data
ciara_posterior <- ciara_sim %>%
filter(y == 3)
# Plot the remaining pi values
ggplot(ciara_posterior, aes(x = pi)) +
geom_density()
Calculating mean pi value for y==3.
ciara_posterior %>%
summarize(mean(pi), sd(pi))
set.seed(84735)
taylor_sim <- data.frame(pi = rbeta(10000, 2, 0.1)) %>%
mutate(y = rbinom(10000, size = 7, prob = pi))
Plotting simulation results
ggplot(taylor_sim, aes(x = pi, y = y)) +
geom_point(aes(color = (y == 3)), size = 0.1)
Filtering for only the data that matches our observed results
# Keep only the simulated pairs that match our data
taylor_posterior <- taylor_sim %>%
filter(y == 3)
# Plot the remaining pi values
ggplot(taylor_posterior, aes(x = pi)) +
geom_density()
Calculating mean pi value for y==3.
taylor_posterior %>%
summarize(mean(pi), sd(pi))
summarize_beta_binomial(1, 2, 3, 7)
Therefore, the posterior model is \[ Beta(4, 6)\]
The mean is slightly less in the exact calculation than in the simulation. The exact calculation yields a slightly lower variability (0.148) than the simulation (0.15).
summarize_beta_binomial(0.5, 1, 3, 7)
Therefore, the posterior model is \[ Beta(3.5, 5)\]
The means are very similar in the simulation and the exact model. The exact calculation yields a slightly lower variability (0.1597) than the simulation (0.165).
summarize_beta_binomial(3, 10, 3, 7)
Therefore, the posterior model is \[ Beta(6, 14)\]
The means in the simulation (0.29) is slight lower than the mean in the exact model (0.3). The exact calculation yields a slightly higher variability (0.01) than the simulation (0.0099). Overall, the simulation very closely approximates the exact model, however.
summarize_beta_binomial(2, 0.1, 3, 7)
Therefore, the posterior model is \[ Beta(5, 4.1)\]
The means in the simulation (0.53) is slight lower than the mean in the exact model (0.55). The exact calculation yields a slightly lower variability (0.157) than the simulation (0.16).
plot_beta_binomial(1, 4, 8, 10)
The data has more influence on the posterior, since the posterior model more closely resembles the likelihood curve. We can see this in that the posterior takes more after the likelihood curve in shape, mean and mode.
plot_beta_binomial(20, 3, 0, 1)
The prior has more influence on the posterior, since the posterior model more closely resembles the prior curve. We can see this in that the posterior takes more after the prior curve in shape, mean and mode.
plot_beta_binomial(4, 2, 1, 3)
The prior has slightly more influence on the posterior, since the posterior model bears a slightly stronger resemblance to the prior curve. We can see this in that the posterior takes more after the prior curve in shape, though its mean and mode lay fairly equally between the likelihood and the prior.
plot_beta_binomial(3, 10, 10, 13)
The posterior is an equal compromise between the data and the prior. We can see this in that the posterior bears an equal resemblance to the prior and likelihood curves.
plot_beta_binomial(20, 2, 10, 200)
The data has more influence on the posterior, since the posterior model more closely resembles the likelihood curve. We can see this in that the posterior takes more after the likelihood curve in shape, mean and mode.
I did this as a means to answer 4.7.
Beta(7, 2) is a right-skewed model, so some reasonable values for pi would be between 0.7 and 0.9.
I would shift the average value of pi even higher, closer to 1. This would increase my estimated mean value, and decrease the amount of variability relative to that hypothesized in the Beta(7, 2) model.
I would shift the average value of pi much lower, closer to 0. My estimated mean value would now fall within 0.2-0.4. This data would also slightly reduce the amount of variability relative to that hypothesized in the Beta(7, 2) model.
I would shift the average value of pi closer to 0.5. This data would also slightly decrease the amount of variability relative to that hypothesized in the Beta(7, 2) model.
#prior alpha and beta
a <- 0.5
b <- 0.5
#posterior alpha and beta
a_1 <- 8.5
b_1 <- 2.5
y <- a_1 - a
n <- b_1-b+y
y
## [1] 8
n
## [1] 10
Plotting the prior, likelihood and posterior models:
plot_beta_binomial(0.5, 0.5, 8, 10)
#prior alpha and beta
a <- 0.5
b <- 0.5
#posterior alpha and beta
a_1 <- 3.5
b_1 <- 10.5
y <- a_1 - a
n <- b_1-b+y
y
## [1] 3
n
## [1] 13
Plotting the prior, likelihood and posterior models:
plot_beta_binomial(0.5, 0.5, 3, 13)
#prior alpha and beta
a <- 10
b <- 1
#posterior alpha and beta
a_1 <- 12
b_1 <- 15
y <- a_1 - a
n <- b_1-b+y
y
## [1] 2
n
## [1] 16
Plotting the prior, likelihood and posterior models:
plot_beta_binomial(10, 1, 2, 16)
#prior alpha and beta
a <- 8
b <- 3
#posterior alpha and beta
a_1 <- 15
b_1 <- 6
y <- a_1 - a
n <- b_1-b+y
y
## [1] 7
n
## [1] 10
Plotting the prior, likelihood and posterior models:
plot_beta_binomial(8, 3, 7, 10)
#prior alpha and beta
a <- 2
b <- 2
#posterior alpha and beta
a_1 <- 5
b_1 <- 5
y <- a_1 - a
n <- b_1-b+y
y
## [1] 3
n
## [1] 6
Plotting the prior, likelihood and posterior models:
plot_beta_binomial(2, 2, 3, 6)
#prior alpha and beta
a <- 1
b <- 1
#posterior alpha and beta
a_1 <- 30
b_1 <- 3
y <- a_1 - a
n <- b_1-b+y
y
## [1] 29
n
## [1] 31
Plotting the prior, likelihood and posterior models:
plot_beta_binomial(1, 1, 29, 31)
summarize_beta_binomial(1, 1, 10, 13)
Therefore, the posterior model is \[Beta(11, 4)\] Now we will plot the prior, likelihood and posterior curves:
plot_beta_binomial(1,1, 10, 13)
summarize_beta_binomial(1, 1, 0, 1)
Therefore, the posterior model is \[Beta(1, 2)\] Now we will plot the prior, likelihood and posterior curves:
plot_beta_binomial(1,1, 0, 1)
summarize_beta_binomial(1, 1, 100, 130)
Therefore, the posterior model is \[Beta(101, 31)\] Now we will plot the prior, likelihood and posterior curves:
plot_beta_binomial(1,1, 100, 130)
summarize_beta_binomial(1, 1, 20, 120)
Therefore, the posterior model is \[Beta(21, 101)\] Now we will plot the prior, likelihood and posterior curves:
plot_beta_binomial(1,1, 20, 120)
summarize_beta_binomial(1, 1, 234, 468)
Therefore, the posterior model is \[Beta(235, 235)\] Now we will plot the prior, likelihood and posterior curves:
plot_beta_binomial(1,1, 234, 468)
summarize_beta_binomial(10, 2, 10, 13)
Therefore, the posterior model is \[Beta(11, 4)\] Now we will plot the prior, likelihood and posterior curves:
plot_beta_binomial(10, 2, 20, 5)
## Warning in (function (x, shape1, shape2, ncp = 0, log = FALSE) : NaNs produced
## Warning in (function (x, shape1, shape2, ncp = 0, log = FALSE) : NaNs produced
## Warning: Removed 101 row(s) containing missing values (geom_path).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 101 row(s) containing missing values (geom_path).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
summarize_beta_binomial(10, 2, 0, 1)
Therefore, the posterior model is \[Beta(1, 2)\] Now we will plot the prior, likelihood and posterior curves:
plot_beta_binomial(10,2, 10, 3)
## Warning in (function (x, shape1, shape2, ncp = 0, log = FALSE) : NaNs produced
## Warning in (function (x, shape1, shape2, ncp = 0, log = FALSE) : NaNs produced
## Warning: Removed 101 row(s) containing missing values (geom_path).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 101 row(s) containing missing values (geom_path).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
summarize_beta_binomial(10, 2, 100, 130)
Therefore, the posterior model is \[Beta(101, 32)\] Now we will plot the prior, likelihood and posterior curves:
plot_beta_binomial(10,2, 100, 130)
summarize_beta_binomial(10, 2, 20, 120)
Therefore, the posterior model is \[Beta(30, 102)\] Now we will plot the prior, likelihood and posterior curves:
plot_beta_binomial(10,2, 20, 120)
summarize_beta_binomial(10, 2, 234, 468)
Therefore, the posterior model is \[Beta(244, 236)\] Now we will plot the prior, likelihood and posterior curves:
plot_beta_binomial(10,2, 234, 468)
caption
The politician’s prior understanding of pi is that the probability of seeing a mean greater than 0.5 is 2, and the probability of seeing a mean less than 0.5 is 0. This doesn’t really make sense to me…why is he restricting his probability of success to greater than 0.5 from the outset?
We can construct the posterior pdf can be found by reference to the likelihood and prior values. Note: the posterior f(pi|y=0)=0 for pi<0.5. However, for pi>0.5, we have the following model:
\[f(\pi|y=0) = f(\pi)*L(\pi|y=0)\] Plugging into this formula, we have:
\[f(\pi|y=0) = 2*C(0, 100)*\pi^{0}*(1-\pi)^{100}\] Below is my sketch of the posterior pdf:
Proof of equivalence:
As n increases, the posterior mode approaches zero. If we fix alpha, beta and y to equal 1, and allow n to vary from 1 to 10 to 100, we can see this trend:
a <- 1
b <- 1
y <- 1
n_0 <- 1
n_1 <- 10
n_2 <- 100
posterior_mode1 <- (a+y-1)/(a+b+n_0-2)
posterior_mode1
## [1] 1
posterior_mode2 <- (a+y-1)/(a+b+n_1-2)
posterior_mode2
## [1] 0.1
posterior_mode3 <- (a+y-1)/(a+b+n_2-2)
posterior_mode3
## [1] 0.01
Since we observed 1 success and 0 failures, we add one to alpha and 0 to beta, producing the following posterior model: \[ Beta(3, 3)\]
Since we observed 1 more success and 0 failures, we add one to alpha and 0 to beta, producing the following posterior model: \[ Beta(4, 3)\]
Since we observed 0 more successes and 1 more failure, we add zero to alpha and 1 to beta, producing the following posterior model: \[ Beta(4, 4)\]
Since we observed 1 more success and 0 more failures, we add one to alpha and 0 to beta, producing the following posterior model: \[ Beta(5, 4)\] ## Exercise 4.16
Since we observed 3 more successes and 2 more failures, we add 3 to alpha and 2 to beta, producing the following posterior model: \[ Beta(5, 5)\]
Since we observed 1 success and 4 more failures, we add 1 to alpha and 4 to beta, producing the following posterior model: \[ Beta(6, 9)\]
Since we observed 1 success and 4 more failures, we add 1 to alpha and 4 to beta, producing the following posterior model: \[ Beta(7, 13)\]
Since we observed 2 successes and 3 more failures, we add 2 to alpha and 3 to beta, producing the following posterior model: \[ Beta(9, 16)\]
plot_beta(4, 3)
The employees believe that an average of around 60% of people will click on the ad, with a moderate level of variance.
For the first employee, with a sample size of one, and 0 successes and one failure, we can add 0 to alpha and 1 to beta, producing the following posterior model: \[ Beta(4, 4)\]
For the second employee, with a sample size of 10, and 3 successes and 7 failures, we can add 3 to alpha and 7 to beta, producing the following posterior model: \[ Beta(7, 10)\] For the third employee, with a sample size of 100, and 20 successes and 80 failures, we can add 20 to alpha and 80 to beta, producing the following posterior model: \[ Beta(24, 83)\] c. Plotting the prior, likelihood and posterior for the first employee:
plot_beta_binomial(4, 3, 0, 1)
Plotting the prior, likelihood and posterior for the second employee:
plot_beta_binomial(4, 3, 3, 10)
Plotting the prior, likelihood and posterior for the third employee:
plot_beta_binomial(4, 3, 20, 100)
uniform_Plot <- function(a, b){
xvals <- data.frame(x = c(a, b)) #Range for x-values
ggplot(data.frame(x = xvals), aes(x = x)) + xlim(c(a, b)) + ylim(0, 1/(b - a)) +
stat_function(fun = dunif, args = list(min = a, max = b), geom = "area",
fill = "green", alpha = 0.35) +
stat_function(fun = dunif, args = list(min = a, max = b)) +
labs(x = "\n u", y = "f(u) \n",
title = paste0("Uniform Distribution \n With Min = ", a, " & Max = ", b, " \n")) +
theme(plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(face="bold", colour="blue", size = 12),
axis.title.y = element_text(face="bold", colour="blue", size = 12)) +
geom_vline(xintercept = a, linetype = "dashed", colour = "red") +
geom_vline(xintercept = b, linetype = "dashed", colour = "red")
}
uniform_Plot(a = 0.5, b = 1)